Loading one-to-one orthologs between P. barbatus and C. floridanus
path_to_pogodata <- paste0(path_to_repo,"/data/gordon_data/")
pogo.cflo <-
read.csv(paste0(path_to_pogodata, "pogo_cflo_orthologs.csv"),
header = T, stringsAsFactors = F, na.strings = c(""," ",".", "NA")) %>%
as_tibble() %>%
select(cflo_gene,pogo_gene) %>%
distinct()
# make a function that takes pogo names and returns cflo names
pogo_to_cflo <- function(geneset) {
cflo_genes <-
pogo.cflo %>%
filter(pogo_gene %in% geneset) %>%
pull(cflo_gene) %>%
unique() %>%
as.character()
return(cflo_genes)
}
# make a function that takes pogo names and returns cflo names
cflo_to_pogo <- function(geneset) {
pogo_genes <-
pogo.cflo %>%
filter(cflo_gene %in% geneset) %>%
pull(pogo_gene) %>%
unique() %>%
as.character()
return(pogo_genes)
}
Using time-course RNASeq data from healthy Cflo forager heads:
Datasets:
# Specify the path to TC5 database
path_to_tc5_repo = "~/Documents/GitHub/Das_et_al_2021"
# Load the TC5 database
tc5.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_tc5_repo,"/data/TC5_data.db"))
# Specify the path to the result-summary from de Bekker and Das 2021
path_to_tc5_results <- paste0(path_to_tc5_repo, "/results/gene_lists/")
## load genes of interest
cflo.for24nur8 <- read.csv(paste0(path_to_tc5_results, "for24nur8_all.csv")) %>% pull(gene_name) %>% unique()
cflo.for24 <- read.csv(paste0(path_to_tc5_results, "for24_all.csv")) %>% pull(gene_name) %>% unique()
cflo.for12 <- read.csv(paste0(path_to_tc5_results, "for12_all.csv")) %>% pull(gene_name) %>% unique()
cflo.for8 <- read.csv(paste0(path_to_tc5_results, "for8_all.csv")) %>% pull(gene_name) %>% unique()
# SAMPLE NAME
## specify sample name
sample.name <- c("pbar_ld","pbar_dd")
## SPECIFY THE DATASET
which.sample <- sample.name[1]
writeLines(paste0("Selected Dataset: ", which.sample))
## Selected Dataset: pbar_ld
# LOAD DATABASES (TC9)
## 1. TC9_ejtk.db
ejtk.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo, "/data/databases/TC9_ejtk.db"))
## 2. TC9_data.db
data.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo, "/data/databases/TC9_data.db"))
# extract the (gene-expr X time-point-of-sampling) data
dat <-
data.db %>%
tbl(., paste0(which.sample ,"_fpkm")) %>%
select(gene_name, everything()) %>%
collect()
writeLines(
"What is the dimensions of the original dataset?
[Rows = #genes, Cols = #samples]"
)
## What is the dimensions of the original dataset?
## [Rows = #genes, Cols = #samples]
dim(dat[-1])
## [1] 12557 12
The above dataset contains all genes (n=12,557) in the ant genome. However, not all of these genes are expressed in the ant head, and some are expressed at very low levels that are not biologically meaningful. Therefore, we will only keep the genes that are “expressed” (≥1 FPKM for at least half of all time points) in the ant head.
# Which genes are expressed throughout the day in forager heads?
# count the number of time points that has ≥ 1 FPKM
# subset the data and only keep the filtered genes
# arguments
min_expression = 1
min_timepoints = 6
dat <- dat |>
## to do ----
## convert into function
mutate(
n_samples = rowSums(
across(
!gene_name,
~ . >= min_expression
),
na.rm = TRUE
)
) |>
filter(
n_samples >= min_timepoints
) |>
select(
- n_samples
)
writeLines("Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]")
## Dimensions of the data post-filtering step [Rows = #genes, Cols = #samples]
dim(dat)
## [1] 9925 13
This is our cleaned, input data file for building the circadian GCN.
datExpr = as.data.frame(t(log2(dat[-c(1)]+1)))
names(datExpr) = dat$gene_name
rownames(datExpr) = names(dat)[-c(1)]
# USE THE FOLLOWING CODE TO CHECK IF YOU HAVE ANY BAD SAMPLES #
# gsg = goodSamplesGenes(datExpr, verbose = 3);
# gsg$allOK
# sampleTree = hclust(dist(datExpr0), method = "average");
# # Plot the sample tree: Open a graphic output window of size 12 by 9 inches
# # The user should change the dimensions if the window is too large or too small.
# sizeGrWindow(12,9)
# #pdf(file = "Plots/sampleClustering.pdf", width = 12, height = 9);
# par(cex = 1);
# par(mar = c(0,4,2,0))
# plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,
# cex.axis = 1.5, cex.main = 2)
# save the number of genes and samples
# that will be used to create the circadian GCN
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
writeLines("Visualizing the log-transformed data")
## Visualizing the log-transformed data
datExpr |>
rownames_to_column(
"sample"
) |>
as_tibble() |>
tidyr::pivot_longer(
cols = !sample,
names_to = "gene_id",
values_to = "log2_fpkm"
) |>
mutate(
sample = stringr::str_remove(sample, "ZT")
) |>
ggplot(
aes(
x = log2_fpkm,
# color = sample,
fill = sample
)
) +
geom_density(
position = "stack"
) +
theme_Publication(25) +
scale_fill_manual(
values = viridis::viridis(nSamples)
) +
theme(
legend.position = "bottom",
legend.justification = "right"
) +
guides(
fill = guide_legend(
nrow = 3,
byrow=TRUE
)
)
# Calculate Kendall's tau-b correlation for each gene-gene pair
# sim_matrix <- cor((datExpr), method = "kendall") # this step takes time
# save(sim_matrix,
# file = paste0(path_to_repo, "/results/wgcna_files/ref",
# which.sample,"sim_matrix_", sample.name[1], "_TC9.RData")) # might be useful to save the sim_matrix and
load(paste0(
path_to_repo,
"/results/wgcna_files/ref",
which.sample,
"sim_matrix_",
sample.name[1],
"_TC9.RData"
)) # load it up
## Let's display a chunk of the matrix (code from Hughitt 2016; github)
heatmap_indices <- sample(nrow(sim_matrix), 200)
writeLines(paste0("Plotting a chunk of the gene-gene similarity matrix with ", length(heatmap_indices), " genes."))
## Plotting a chunk of the gene-gene similarity matrix with 200 genes.
gplots::heatmap.2(t(sim_matrix[heatmap_indices, heatmap_indices]),
col=inferno(100),
labRow=NA, labCol=NA,
trace='none', dendrogram='row',
xlab='Gene', ylab='Gene',
main= paste0("Similarity matrix \n correlation method = 'kendall' \n (", length(heatmap_indices), " random genes)"),
density.info='none', revC=TRUE)
writeLines("Performing network topology analysis to pick soft-thresholding power")
## Performing network topology analysis to pick soft-thresholding power
# Choose a set of soft-thresholding powers
powers = c(c(1:10), seq(from = 12, to=30, by=2))
# # Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 4507.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 4507 of 9925
## ..working on genes 4508 through 9014 of 9925
## ..working on genes 9015 through 9925 of 9925
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.703 2.540 0.937 3690.0 3800.00 5080
## 2 2 0.274 0.518 0.819 1940.0 2010.00 3330
## 3 3 0.065 -0.192 0.668 1180.0 1200.00 2450
## 4 4 0.350 -0.557 0.731 793.0 776.00 1910
## 5 5 0.505 -0.778 0.783 564.0 526.00 1550
## 6 6 0.579 -0.924 0.821 418.0 371.00 1280
## 7 7 0.616 -1.020 0.844 320.0 269.00 1080
## 8 8 0.636 -1.120 0.851 252.0 200.00 929
## 9 9 0.657 -1.190 0.863 202.0 151.00 806
## 10 10 0.676 -1.230 0.875 165.0 115.00 706
## 11 12 0.707 -1.300 0.892 115.0 70.70 555
## 12 14 0.713 -1.340 0.887 83.2 45.00 447
## 13 16 0.735 -1.350 0.892 62.5 29.60 367
## 14 18 0.764 -1.340 0.900 48.2 20.30 305
## 15 20 0.811 -1.310 0.924 38.0 14.20 258
## 16 22 0.838 -1.310 0.942 30.5 10.20 224
## 17 24 0.819 -1.400 0.937 24.9 7.46 203
## 18 26 0.818 -1.460 0.943 20.6 5.53 186
## 19 28 0.836 -1.480 0.960 17.2 4.14 170
## 20 30 0.853 -1.510 0.971 14.5 3.18 157
writeLines("Plotting the resutls from the network topology analysis")
## Plotting the resutls from the network topology analysis
# Plot the results:
# sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.70,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
par(mfrow = c(1,1));
NOTE: The scale-free topology fit index reaches ~0.70 at a soft-thresholding-power=12.
Now, we can go ahead and create our adjacency matrix by power-transforming the similarity matrix (see WGCNA for more info).
## Specify the soft-thresholding-power
soft.power = 12
# Construct adjacency matrix
# adj_matrix <- adjacency.fromSimilarity(sim_matrix,
# power=soft.power,
# type='signed'
# )
# save(adj_matrix,
# file = paste0(path_to_repo, "/results/wgcna_files/ref",
# which.sample,"adj_matrix_", sample.name[1], "_TC9.RData"))
## Delete similarity matrix to free up memory
rm(sim_matrix)
gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 5357506 286.2 9813792 524.2 NA 9503946 507.6
## Vcells 10626677 81.1 246884106 1883.6 32768 481798582 3675.9
# might be useful to save the sim_matrix and
load(paste0(path_to_repo, "/results/wgcna_files/ref",
which.sample,"adj_matrix_", sample.name[1], "_TC9.RData"))
# load it up
# Convert adj_matrix to matrix
gene_ids <- rownames(adj_matrix)
adj_matrix <- matrix(adj_matrix, nrow=nrow(adj_matrix))
rownames(adj_matrix) <- gene_ids
colnames(adj_matrix) <- gene_ids
writeLines(paste0("Plotting the power-transformed adjacency matrix for the same ", length(heatmap_indices)," genes as above"))
## Plotting the power-transformed adjacency matrix for the same 200 genes as above
## Same heatmap as before, but now with the power-transformed adjacency matrix
gplots::heatmap.2(t(adj_matrix[heatmap_indices, heatmap_indices]),
col=inferno(100),
labRow=NA, labCol=NA,
trace='none', dendrogram='row',
xlab='Gene', ylab='Gene',
main='Adjacency matrix\n(post power transformation)',
density.info='none', revC=TRUE)
The following steps are performed as per guidelines from the WGCNA package and several tutorials made available online.
# # Turn adjacency into topological overlap
# TOM = TOMsimilarity(adj_matrix);
# dissTOM = 1-TOM
# save(dissTOM,
# file = paste0(path_to_repo, "/results/wgcna_files/ref",
# which.sample,"dissTOM_", sample.name[1], "_TC9.RData")) # might be useful to save the sim_matrix and
load(paste0(path_to_repo, "/results/wgcna_files/ref",
which.sample,"dissTOM_", sample.name[1], "_TC9.RData")) # load it up
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average")
writeLines("Plotting the resulting clustering tree (dendrogram)")
## Plotting the resulting clustering tree (dendrogram)
# sizeGrWindow(12,9)
# reset plotting parameter
par(mfrow = c(1,1))
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
labels = FALSE, hang = 0.04)
User defined parameters:
# We like large modules, so we set the minimum module size relatively high:
minModuleSize = 50;
# Module identification using dynamic tree cut:
dynamicMods= cutreeDynamic(dendro = geneTree,
distM = dissTOM,
method = "hybrid",
verbose = 4,
deepSplit = 3,
# see WGCNA for more info on tuning parameters
pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
## ..cutHeight not given, setting it to 0.995 ===> 99% of the (truncated) height range in dendro.
## ..Going through the merge tree
##
## ..Going through detected branches and marking clusters..
## ..Assigning Tree Cut stage labels..
## ..Assigning PAM stage labels..
## ....assigned 5159 objects to existing clusters.
## ..done.
# view number of genes in each module
# table(dynamicMods)
writeLines("How many genes are there in each of the initial modules (clusters) detected?
Note: The names of the modules (colors) have no meaning.")
## How many genes are there in each of the initial modules (clusters) detected?
## Note: The names of the modules (colors) have no meaning.
# Convert numeric lables into colors
dynamicColors = WGCNA::labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
## black blue brown cyan darkgreen
## 521 880 745 233 117
## darkgrey darkmagenta darkolivegreen darkorange darkred
## 108 66 66 100 125
## darkturquoise green greenyellow grey60 lightcyan
## 114 620 342 200 201
## lightgreen lightyellow magenta midnightblue orange
## 166 154 431 205 106
## paleturquoise pink purple red royalblue
## 72 448 378 585 151
## saddlebrown salmon sienna3 skyblue steelblue
## 88 253 62 88 79
## tan turquoise violet white yellow
## 273 1068 69 99 657
## yellowgreen
## 55
User defined parameters:
# Calculate eigengenes
MEList = WGCNA::moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs, method = "kendall");
writeLines("Clustering the module eigengenes and identifying a cutoff to merge similar modules")
## Clustering the module eigengenes and identifying a cutoff to merge similar modules
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
# Plot the result
# sizeGrWindow(7, 8)
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "MEDiss = 1-cor(MEs, method = 'kendall')")
# We choose a height cut of 0.25, corresponding to correlation of 0.75, to merge
MEDissThres = 0.25 # user-specified parameter value; see WGCNA manual for more info
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
writeLines(paste0("Merging modules that have a correlation ≥ ", 1-MEDissThres))
## Merging modules that have a correlation ≥ 0.75
# Call an automatic merging function
merge = WGCNA::mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
## mergeCloseModules: Merging modules whose distance is less than 0.25
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 36 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 16 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 12 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 11 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 11 module eigengenes in given set.
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;
writeLines("Plotting the identified clusters (denoted with colors) before and after merging.")
## Plotting the identified clusters (denoted with colors) before and after merging.
# sizeGrWindow(12, 9)
WGCNA::plotDendroAndColors(geneTree,
cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
# Rename to moduleColors
moduleColors = mergedColors
# Construct numerical labels corresponding to the colors
colorOrder = c("grey",
WGCNA::standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1
writeLines("Calculating module-module similarity based on module-eigengene-expression.")
## Calculating module-module similarity based on module-eigengene-expression.
# Calculate similarity of the eigen-genes
sim_matrix_ME <- cor(mergedMEs, method = "kendall")
# calculate adj_matrix
adj_matrix_ME <- WGCNA::adjacency.fromSimilarity(sim_matrix_ME,
power=1, # DO NOT power transform
type='signed'
)
## CHANGE THE NAMES OF THE MODULES;
module_ids <- data.frame(
old_labels = rownames(adj_matrix_ME) %>%
str_split("ME", 2) %>%
sapply("[", 2) %>%
as.character(),
new_labels = paste0(
"C",
1:nrow(adj_matrix_ME)
)
)
# and coerce into matrix
adj_matrix_ME <- matrix(adj_matrix_ME, nrow=nrow(adj_matrix_ME))
rownames(adj_matrix_ME) <- module_ids$new_labels
colnames(adj_matrix_ME) <- module_ids$new_labels
# ## KEEP THE SAME MODULE NAMES (named by color)
# gene_ids <- rownames(adj_matrix_ME)
# # coerce into a matrix
# adj_matrix_ME <- matrix(adj_matrix_ME, nrow=nrow(adj_matrix_ME))
# rownames(adj_matrix_ME) <- gene_ids
# colnames(adj_matrix_ME) <- gene_ids
writeLines("Plotting the adjacency matrix that shows module-module similarity in expression")
## Plotting the adjacency matrix that shows module-module similarity in expression
gplots::heatmap.2(t(adj_matrix_ME),
col=inferno(100),
# labRow=NA, labCol=NA,
trace='none', dendrogram='row',
xlab='', ylab='',
# main='Similarity matrix - MEs \n correlation method = "kendall")',
main='Adjacency matrix - MEs \n (module-module similarity)',
density.info='none', revC=TRUE)
pacman::p_load(igraph)
adj_matrix_ME_igraph <- adj_matrix_ME
# get rid of low correlations (0.6 & 0.8 are arbitrary) [0.7 and 0.9]
adj_matrix_ME_igraph[adj_matrix_ME_igraph < 0.6] <- 0
adj_matrix_ME_igraph[adj_matrix_ME_igraph < 0.8 & adj_matrix_ME_igraph>0] <- 0.5
adj_matrix_ME_igraph[adj_matrix_ME_igraph >= 0.9] <- 1
# build_network
network <- graph.adjacency(adj_matrix_ME_igraph,
mode = "upper",
weighted = T,
diag = F)
# simplify network
network <- igraph::simplify(network) # removes self-loops
# remove isolated vertices (keep only the nodes)
isolated <- which(degree(network)==0)
network <- igraph::delete.vertices(network, isolated)
# E(network)$width <- E(network)$weight + min(E(network)$weight) + 1 # offset=1
# colors <- as.character(module_ids$old_labels)
# V(network)$color <- colors
V(network)$color <- "white"
# genes_ME <- factor(moduleColors, levels=colors) %>% summary()
V(network)$size <- igraph::degree(network, mode = "all")*3
# V(network)$size <- log2(genes_ME)^1.3
V(network)$label.color <- "black"
V(network)$frame.color <- "black"
E(network)$width <- E(network)$weight^2*4
E(network)$color <- "black"
# ## highlight shortest paths between two vetices
# short.path <- igraph::get.shortest.paths(network, "S_5", "S_15")
# E(network, path = unlist(short.path[[1]]))$color <- col.scheme[2]
# E(network, path = unlist(short.path[[1]]))$width <- E(network)$weight*8
writeLines("Visualizing a simplified representation of the circadian GCN, with and without labels")
## Visualizing a simplified representation of the circadian GCN, with and without labels
# trash <- dev.off()
par(mfrow = c(1,1))
# png(paste0(path_to_repo, "/results/figures/", sample.name[1], "/", script.name,"/", sample.name[1],"_GCN_2.png"),
# width = 20, height = 30, units = "cm", res = 600)
# par(bg=NA)
plot(network,
size=20,
layout=layout.kamada.kawai,
# layout=layout.fruchterman.reingold,
# layout=layout.graphopt,
# layout=layout_in_circle,
# vertex.label=NA
vertex.size=hub.score(network)$vector*30
# vertex.shape="none"
)
# dev.off()
# Make a list that returns gene names for a given cluster
module_genes <- list()
modules.to.exclude <- c("")
# modules.to.exclude <- c(paste0("C",c(2,5,6,7,10:17,19)))
which.modules <- module_ids %>% filter(!new_labels %in% modules.to.exclude) %>% pull(old_labels)
which.labels <- module_ids %>% filter(!new_labels %in% modules.to.exclude) %>% pull(new_labels)
# Get the genes from each of the modules
for (i in 1:length(which.modules)) {
# which color
mod.color = as.character(which.modules[[i]])
# subset
module_genes[[i]] <- names(datExpr)[which(moduleColors==mod.color)]
names(module_genes)[[i]] <- as.character(which.labels[[i]])
}
# # check the result | works
# names(module_genes)
# module_genes['C22']
# [13 Dec 2021]
# Save a csv with the module identity information for all genes used in building the GCN
# make a dataframe with gene_name and module_identity
for (i in 1:length(module_genes)){
if (i == 1){
pbar.ld.mods <- data.frame(gene_name = module_genes[[i]],
module_identity = as.character(names(module_genes)[i]))
}
else{
foo <- data.frame(gene_name = module_genes[[i]],
module_identity = as.character(names(module_genes)[i]))
pbar.ld.mods <- rbind(pbar.ld.mods, foo)
}
}
# save the dataframe as a csv
pbar.ld.mods %>%
left_join(module_ids, by = c("module_identity" = "new_labels")) %>%
write.csv(.,
paste0(path_to_repo,
"/results/wgcna_files/res/pbar_ld_module_identity_new_labels.csv"),
row.names = F)
# done.
# # save a copy with all the gene annotations
# # load Cflo gene annotations
# cflo_annots <- read.csv(paste0(path_to_repo,"/functions/func_data/cflo_annots.csv"),
# header=T, stringsAsFactors = F, na.strings = c(NA,""," "))
# pbar.ld.mods %>%
# left_join(cflo_annots, by="gene_name") %>% head()
# write.csv(.,
# paste0(path_to_repo,
# "/results/WGCNA/cflo/cflo_heads_control_module_identity_new_labels_annots.csv"),
# row.names = F)
# load the list of all genes of interest
load(file = paste0(path_to_repo,"/results/genes_of_interest/goi_list.RData"))
sapply(goi.list[[1]][[1]], length)
## pbar_ld_24h pbar_ld_12h pbar_ld_08h
## 1536 179 354
## Rhythmic genes
rhy24.ld <- goi.list[[1]][[1]][[1]]
rhy24.dd <- goi.list[[1]][[2]][[1]]
## Ultradian genes
rhy12.ld <- goi.list[[1]][[1]][[2]]
rhy12.dd <- goi.list[[1]][[2]][[2]]
rhy08.ld <- goi.list[[1]][[1]][[3]]
rhy08.dd <- goi.list[[1]][[2]][[3]]
# ## DRGs
# for.brain.head.rhy24.cluster1 <- goi.list[[2]]
# for24.nur8 <- goi.list[[6]][[1]]
Modules vs. Rhythmic genes
writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")
## #####################################################
## How many genes are in each of my geneset of interest?
## #####################################################
## MAKE YOUR LIST OF GENES OF INTEREST ##
# LIST ONE - WGCNA modules
list1 <- module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")
## List of interesting genes #1
## ----------------------------
## Genes in each of the identified gene-clusters or modules
sapply(list1, length)
## C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11
## 810 1855 246 200 2015 69 233 99 143 2225 2030
## LIST TWO - rhythmic genes
list2 <- list(rhy24.ld,
rhy24.dd,
rhy12.ld,
rhy12.dd,
rhy08.ld,
rhy08.dd
)
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes in control Cflo heads")
## List of interesting genes #2
## ----------------------------
## Rhythmic genes in control Cflo heads
names(list2) <- c("rhy24-Pbar-LD",
"rhy24-Pbar-DD",
"rhy12-Pbar-LD",
"rhy12-Pbar-DD",
"rhy08-Pbar-LD",
"rhy08-Pbar-DD"
)
sapply(list2, length)
## rhy24-Pbar-LD rhy24-Pbar-DD rhy12-Pbar-LD rhy12-Pbar-DD rhy08-Pbar-LD
## 1536 1547 179 341 354
## rhy08-Pbar-DD
## 717
## CHECK FOR OVERLAP
## make a GOM object
gom.1v2 <- newGOM(list2, list1,
genome.size = nGenes)
# png(paste0(path_to_repo, "/results/figures/",
# "02_pogo_GCN/",
# sample.name[1],"_gom_1v2.png"),
# width = 35, height = 15, units = "cm", res = 300)
drawHeatmap(gom.1v2,
adj.p=T,
cutoff=0.05,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "orange")
# trash <- dev.off()
# writeLines("How many genes exactly are overlapping between the pairwise comparisons")
# getMatrix(gom.1v4, name = "intersection") %>% t()
writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
which.modules <- c("C1","C2",
"C10","C11")
module.plots <- list()
for (i in 1: length(which.modules)) {
writeLines(paste0("Plotting expression for module ",
which.modules[i]))
module.plots[[i]] <-
module_genes[[which.modules[[i]]]] %>%
stacked.zplot(bg.alpha = .08, alpha = 1) %>%
multi.plot(rows = 2, cols = 1)
}
## Plotting expression for module C1
## Plotting expression for module C2
## Plotting expression for module C10
## Plotting expression for module C11
module.plots <- module.plots |>
setNames(which.modules)
# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"module_exp.png"),
# width = 25, height = 40, units = "cm", res = 400)
# module.plots %>%
# multi.plot(rows = 3, cols = 2)
# trash <- dev.off()
paste0("C", 1:11) |>
purrr::map(
function(m) {
module_genes[[m]] |>
amplitude.plot(
stats = FALSE
) +
labs(
title = m
) +
scale_y_continuous(
n.breaks = 3
) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
}
) |>
multi.plot(rows = 3, cols = 4)
## TableGrob (3 x 4) "arrange": 11 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (1-1,3-3) arrange gtable[layout]
## 4 4 (1-1,4-4) arrange gtable[layout]
## 5 5 (2-2,1-1) arrange gtable[layout]
## 6 6 (2-2,2-2) arrange gtable[layout]
## 7 7 (2-2,3-3) arrange gtable[layout]
## 8 8 (2-2,4-4) arrange gtable[layout]
## 9 9 (3-3,1-1) arrange gtable[layout]
## 10 10 (3-3,2-2) arrange gtable[layout]
## 11 11 (3-3,3-3) arrange gtable[layout]
From WGCNA-tutorial
“We begin by calculating the intramodular connectivity for each gene. (In network literature, connectivity is often referred to as”degree”.) The function intramodularConnectivity computes:
# From what I can tell, colorh1 in the tutorial refers to moduleColors
colorh1 <- moduleColors
# Calculate the connectivities of the genes
Alldegrees1=intramodularConnectivity(adjMat = adj_matrix, colors = colorh1)
Alldegrees1 <-
Alldegrees1 %>%
rownames_to_column("gene_name") %>%
mutate_at(vars(starts_with("k")),
function(x){
round(x,2)
})
head(Alldegrees1)
## gene_name kTotal kWithin kOut kDiff
## 1 LOC105421953 50.66 42.40 8.27 34.13
## 2 LOC105421955 29.61 17.94 11.67 6.27
## 3 LOC105421956 30.19 15.89 14.31 1.58
## 4 LOC105421958 137.18 125.42 11.76 113.66
## 5 LOC105421959 29.29 15.82 13.47 2.35
## 6 LOC105421960 45.23 31.11 14.12 16.98
Generalizing intramodular connectivity for all genes
“The intramodular connectivity measure is only defined for the genes inside a given module. But in practice it can be very important to measure how connected a given genes is to biologically interesting modules. Toward this end, we define a module eigengene-based connectivity measure for each gene as the correlation between a the gene expression and the module eigengene.
Specifically,
kMEbrown(i) = cor(xi,MEbrown)
where xi is the gene expression profile of gene i and MEbrown is the module eigengene of the brown module. Note: that the definition does not require that gene i is a member of the brown module. We define a data frame containing the module membership (MM) values for each module. In the past, we called the module membership values kME.”
Calculate the signed kME and display the first few rows/columns.
datKME=signedKME(datExpr, mergedMEs, outputColumnName="")
# Display the first few rows of the data frame
datKME[1:6,1:6]
## black lightyellow darkgrey grey60 blue
## LOC105421953 0.44222741 -0.25047902 0.3573258 0.39778640 0.7453533
## LOC105421955 -0.60646385 -0.56298796 0.2457627 0.55570755 -0.1096598
## LOC105421956 0.66900676 0.11698937 0.1002657 0.04285703 0.6500437
## LOC105421958 0.70323811 -0.09883344 -0.0593568 0.24758477 0.8761597
## LOC105421959 -0.01839094 -0.42274637 0.1159919 0.49110237 0.3959603
## LOC105421960 -0.65970317 -0.59076495 0.6733935 0.57430514 -0.1134845
## violet
## LOC105421953 0.53984393
## LOC105421955 -0.01133061
## LOC105421956 0.39863896
## LOC105421958 0.57534334
## LOC105421959 0.52640431
## LOC105421960 -0.09532808
Let’s plot the connectivity vs. module-membership values for all identified clusters.
# colorlevels=as.character(module_ids$old_labels)
# # sizeGrWindow(9,6)
#
# png(paste0(path_to_repo, "/results/figures/", sample.name[1],"/", script.name, "/",
# sample.name[1],"_connectivity_kWithin_v_modulemembership.png"),
# width = 30, height = 40, units = "cm", res = 300)
#
# par(mfrow=c(as.integer(0.5+length(colorlevels)/4), 4))
# par(mar = c(3,3,2,2))
# # par(mar = c(4,5,3,1))
# for (i in c(1:length(colorlevels))) {
# whichmodule=colorlevels[[i]];
# restrict1 = (colorh1==whichmodule);
# verboseScatterplot(Alldegrees1$kWithin[restrict1],
# datKME[restrict1, whichmodule],
# col= rgb(red=169, green=169, blue=169, alpha = 80, maxColorValue = 255),
# # bg = colorh1[restrict1],
# bg = rgb(red=169, green=169, blue=169, alpha = 30, maxColorValue = 255),
# pch = 21,
# lwd = 1.5,
# cex = 1.2,
#
# # main=whichmodule,
# main=module_ids %>% filter(old_labels==whichmodule) %>% pull(new_labels) %>% as.character(),
# xlab = "Connectivity_kWithin", ylab = "Module-membership", abline = TRUE)
# }
#
# trash <- dev.off()
Plotting the mean (± 95% CI) connectivity of genes in different modules
#
# # Make the summary function (borrowed)
# summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
# conf.interval=.95, .drop=TRUE) {
#
# # New version of length which can handle NA's: if na.rm==T, don't count them
# length2 <- function (x, na.rm=FALSE) {
# if (na.rm) sum(!is.na(x))
# else length(x)
# }
#
# # This does the summary. For each group's data frame, return a vector with
# # N, mean, and sd
# datac <- plyr::ddply(data, groupvars, .drop=.drop,
# .fun = function(xx, col) {
# c(N = length2(xx[[col]], na.rm=na.rm),
# mean = mean(xx[[col]], na.rm=na.rm),
# sd = stats::sd(xx[[col]], na.rm=na.rm)
# )
# },
# measurevar
# )
#
# # Rename the "mean" column
# datac <- plyr::rename(datac, c("mean" = measurevar))
#
# datac$se <- datac$sd / sqrt(as.numeric(datac$N)) # Calculate standard error of the mean
#
# # Confidence interval multiplier for standard error
# # Calculate t-statistic for confidence interval:
# # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
# ciMult <- qt(conf.interval/2 + .5, datac$N-1)
# datac$ci <- datac$se * ciMult
#
# return(datac)
# }
pd <- position_dodge(0.1)
Alldegrees1 %>%
# rownames_to_column("gene_name") %>%
left_join(pbar.ld.mods, by="gene_name") %>%
# PLOT FROM RAW DATA
mutate(module_identity = factor(module_identity,
levels = paste0("C",1:nrow(module_ids)))) %>%
# ggplot(aes(x=module_identity, y=kTotal)) +
# # ggplot(aes(x=module_identity, y=kWithin)) +
#
# geom_hline(yintercept = 45, col="darkgrey", alpha=0.8) +
# geom_hline(yintercept = 30, col="darkgrey", alpha=0.8) +
# # geom_hline(yintercept = 75, col="darkgrey", alpha=0.8) +
#
# geom_boxplot(fill="lightgrey",
# alpha=0.6,
# outlier.color = "grey60",
# position = "dodge2") +
#
# theme_Publication() +
# scale_colour_Publication() +
# xlab("") +
# ggtitle("") +
#
# ylab("Total connectivity") +
# # ylab("Intramodular connectivity") +
#
# theme(text = element_text(size = 20, colour = 'black'),
# legend.position = "none",
# axis.line.y = element_line(colour = "transparent",size=1)) +
# coord_flip()
# SUMMARIZE DATA AND PLOT
# summarySE(., measurevar = "kWithin", groupvars = "module_identity") %>%
summarySE(., measurevar = "kTotal", groupvars = "module_identity") %>%
# make the value column
mutate(value=kTotal) %>%
mutate(module_identity = factor(module_identity,
levels = paste0("C",1:nrow(module_ids)))) %>%
# Plot
ggplot(aes(x=((module_identity)), y=value)) +
theme_Publication() +
scale_colour_Publication() +
xlab("") +
ylab("Total connectivity") +
ggtitle("") +
#
# theme(axis.title.x = element_blank(),
# axis.title.y = element_blank(),
# legend.position='none',
# # legend.title = element_text(size = 15, colour = 'black'),
# legend.text = element_blank()) +
# ## center align the title
# theme(plot.title = element_blank()) +
#
# scale_x_continuous(breaks = c(0,12,24)) +
#scale_y_continuous(limits = c(0,40)) +
# theme(panel.grid.major.x = element_line(colour = "#808080", size=0.1),
# panel.grid.major.y = element_line(colour = "#808080", size=0.2)) +
# ## if you need highlighting parts of the graph
# geom_rect(aes(xmin = 11.8, xmax = 23.8, ymin = -Inf, ymax = Inf),
# fill = "lightgrey", alpha = 0.02, color=NA) +
# geom_line(position=pd,
# col="#F2CB05", size=2, alpha=1) + # total
# # col="#BF0404", size=2, alpha=1) + # FS
# # col="#0FBF67", size=2, alpha=1) + # FA
## Add error bar here
geom_errorbar(aes(ymin=value-ci, ymax=value+ci),
width=.4, position=pd, col="black", alpha = 0.7) +
# Add the points on top of the error bars
geom_point(position=pd, size=2.5,
col="black", fill="grey60",
show.legend = F, color="black", pch=21, alpha=0.9) +
#facet_wrap(~Phase, nrow=2)
# scale_fill_manual(values = c("black","#F20505","#F2CB05","#0FBF67")) +
# scale_color_manual(values=c("#F20505","#F5D736","#0FBF67")) +
theme(text = element_text(size = 20, colour = 'black'),
legend.position = "none") +
coord_flip()
# [as per reviewer request]
# forcing the y-axis to start at 0
# expand_limits(x = 0, y = 0)
Next, we can look at betweenness centrality:
# network.complete <- graph_from_adjacency_matrix(adj_matrix,
# mode = "undirected",
# weighted = T)
#
# V(network.complete) %>% length()
# E(network.complete) %>% length()
# Read 2020_Friedman_et_al data -------------------------------------------
df.2020 <- read.csv(paste0(path_to_pogodata, "2020_friedman_Pbar_TPM_TraitCorr_dNdS.csv"),
header = T, stringsAsFactors = F, na.strings = c(""," ",".", "NA")) %>% as_tibble()
# note, it is unclear what the columns actually mean,
# but, below find an example of what we could do.
# Assumption: "humidity_cor" represents the correlation of gene expression to humidity changes
## Keep relevant columns
df.2020 <-
df.2020 %>%
select(pogo_gene = LOC,
humidity_cor:da_5ht_cor_colony_mean)
## Deciding on the threshold
## the first and the third quartiles
# Set humidity-expression correlation threshold
h.threshold <- quantile(df.2020$humidity_cor, prob=c(.05,.95), type=1)
# obtain pogo genes that satisfied the threshold
h.genes.pos <-
df.2020 %>%
as_tibble() %>%
select(pogo_gene,
humidity_cor) %>%
na.omit() %>%
filter(humidity_cor > h.threshold[2]) %>%
pull(pogo_gene)
h.genes.neg <-
df.2020 %>%
as_tibble() %>%
select(pogo_gene,
humidity_cor) %>%
na.omit() %>%
filter(humidity_cor < h.threshold[1]) %>%
pull(pogo_gene)
The top and bottom 5% gaves us a set of 516 positively and a set of 516 negatively correlated pogo genes.
Now, let’s see which modules, if any, in the cflo brain GCN do these genes reside?
The names have the following meanings:
Modules vs. Rhythmic|BehavioralPlasticity genes
writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")
## #####################################################
## How many genes are in each of my geneset of interest?
## #####################################################
## MAKE YOUR LIST OF GENES OF INTEREST ##
# LIST ONE - WGCNA modules
list1 <- module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")
## List of interesting genes #1
## ----------------------------
## Genes in each of the identified gene-clusters or modules
sapply(list1, length)
## C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11
## 810 1855 246 200 2015 69 233 99 143 2225 2030
## LIST TWO - rhythmic genes
list3 <- list(rhy24.ld,
rhy24.dd,
rhy12.ld,
rhy12.dd,
rhy08.ld,
rhy08.dd,
h.genes.pos,
h.genes.neg
)
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes in control Cflo heads")
## List of interesting genes #2
## ----------------------------
## Rhythmic genes in control Cflo heads
names(list3) <- c("rhy24-Pbar-LD",
"rhy24-Pbar-DD",
"rhy12-Pbar-LD",
"rhy12-Pbar-DD",
"rhy08-Pbar-LD",
"rhy08-Pbar-DD",
"pogo-rH-pos",
"pogo-rH-neg"
)
sapply(list3, length)
## rhy24-Pbar-LD rhy24-Pbar-DD rhy12-Pbar-LD rhy12-Pbar-DD rhy08-Pbar-LD
## 1536 1547 179 341 354
## rhy08-Pbar-DD pogo-rH-pos pogo-rH-neg
## 717 516 516
writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
## CHECK FOR OVERLAP
## make a GOM object
gom.1v3 <- newGOM(list3, list1,
genome.size = nGenes)
# png(paste0(path_to_repo, "/results/figures/",
# "02_pogo_GCN/",
# sample.name[1],"_gom_1v2.png"),
# width = 35, height = 15, units = "cm", res = 300)
drawHeatmap(gom.1v3,
adj.p=T,
cutoff=0.05,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "orange")
# trash <- dev.off()
writeLines("How many genes exactly are overlapping between the pairwise comparisons")
## How many genes exactly are overlapping between the pairwise comparisons
getMatrix(gom.1v3, name = "intersection") %>% t()
## rhy24-Pbar-LD rhy24-Pbar-DD rhy12-Pbar-LD rhy12-Pbar-DD rhy08-Pbar-LD
## C1 139 168 11 8 8
## C2 720 510 17 20 34
## C3 68 19 0 9 3
## C4 54 17 0 1 5
## C5 30 246 22 54 44
## C6 4 4 0 3 0
## C7 14 42 2 7 2
## C8 23 7 0 2 2
## C9 0 22 0 17 2
## C10 18 222 21 65 24
## C11 354 134 28 43 60
## rhy08-Pbar-DD pogo-rH-pos pogo-rH-neg
## C1 74 134 5
## C2 47 202 20
## C3 17 3 18
## C4 9 4 9
## C5 123 108 36
## C6 6 2 5
## C7 12 0 17
## C8 2 2 4
## C9 2 13 0
## C10 135 18 179
## C11 144 13 202
pogo.for24 <-
cflo.for24 %>% # 3569 cflo genes with 24h rhythms in forager brains
cflo_to_pogo() # 3265 cflo genes have a one-to-one pbar ortholog
pogo.for24nur8 <-
cflo.for24nur8 %>% # 291 cflo genes with 24h rhythms in forager brains, but 8h in nurses
cflo_to_pogo() # 276 cflo genes have a one-to-one pbar ortholog
The top and bottom 5% gaves us a set of 516 positively and a set of 516 negatively correlated pogo genes.
Now, let’s see which modules, if any, in the cflo brain GCN do these genes reside?
The names have the following meanings:
pogo-rH-pos = Pogonomyrmex genes whose expression show a positive correlation with collective response to abiotic changes
pogo-rH-neg = Pogonomyrmex genes whose expression show a negative correlation with collective response to abiotic changes
for24-Cflo = P. barbatus orthologs of Cflo genes that show 24h rhythms in forager brains
for24nur8-Cflo = P. barbatus orthologs of Cflo genes that show 24h rhythms in forager brains, but 8h oscillations in nurse brains
Modules vs. Rhythmic|BehavioralPlasticity genes - V2
writeLines("#####################################################
How many genes are in each of my geneset of interest?
#####################################################")
## #####################################################
## How many genes are in each of my geneset of interest?
## #####################################################
## MAKE YOUR LIST OF GENES OF INTEREST ##
# LIST ONE - WGCNA modules
list1 <- module_genes
writeLines("List of interesting genes #1
----------------------------
Genes in each of the identified gene-clusters or modules")
## List of interesting genes #1
## ----------------------------
## Genes in each of the identified gene-clusters or modules
sapply(list1, length)
## C1 C2 C3 C4 C5 C6 C7 C8 C9 C10 C11
## 810 1855 246 200 2015 69 233 99 143 2225 2030
## LIST TWO - rhythmic genes
list4 <- list(rhy24.ld,
rhy24.dd,
h.genes.pos,
h.genes.neg,
pogo.for24,
pogo.for24nur8
)
writeLines("List of interesting genes #2
----------------------------
Rhythmic genes in control Cflo heads")
## List of interesting genes #2
## ----------------------------
## Rhythmic genes in control Cflo heads
names(list4) <- c("rhy24-Pbar-LD",
"rhy24-Pbar-DD",
"pogo-rH-pos",
"pogo-rH-neg",
"for24-Cflo",
"for24nur8-Cflo"
)
sapply(list4, length)
## rhy24-Pbar-LD rhy24-Pbar-DD pogo-rH-pos pogo-rH-neg for24-Cflo
## 1536 1547 516 516 3265
## for24nur8-Cflo
## 276
writeLines("Visualizing the significant overlaps between your lists of interesting genes and the identified modules")
## Visualizing the significant overlaps between your lists of interesting genes and the identified modules
## CHECK FOR OVERLAP
## make a GOM object
gom.1v4 <- newGOM(list4, list1,
genome.size = nGenes)
# png(paste0(path_to_repo, "/results/figures/",
# "02_pogo_GCN/",
# sample.name[1],"_gom_1v2.png"),
# width = 35, height = 15, units = "cm", res = 300)
drawHeatmap(gom.1v4,
adj.p=T,
cutoff=0.05,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "orange")
# trash <- dev.off()
writeLines("How many genes exactly are overlapping between the pairwise comparisons")
## How many genes exactly are overlapping between the pairwise comparisons
getMatrix(gom.1v4, name = "intersection") %>% t()
## rhy24-Pbar-LD rhy24-Pbar-DD pogo-rH-pos pogo-rH-neg for24-Cflo
## C1 139 168 134 5 315
## C2 720 510 202 20 633
## C3 68 19 3 18 71
## C4 54 17 4 9 54
## C5 30 246 108 36 501
## C6 4 4 2 5 23
## C7 14 42 0 17 65
## C8 23 7 2 4 20
## C9 0 22 13 0 21
## C10 18 222 18 179 751
## C11 354 134 13 202 765
## for24nur8-Cflo
## C1 59
## C2 101
## C3 2
## C4 2
## C5 52
## C6 1
## C7 2
## C8 0
## C9 3
## C10 28
## C11 23
writeLines("What are the corresponding Odds-ratio?")
## What are the corresponding Odds-ratio?
getMatrix(gom.1v4, name = "odds.ratio") %>% t() %>% round(2)
## rhy24-Pbar-LD rhy24-Pbar-DD pogo-rH-pos pogo-rH-neg for24-Cflo
## C1 1.14 1.47 4.53 0.10 1.33
## C2 5.64 2.57 3.02 0.17 1.07
## C3 2.14 0.45 0.22 1.46 0.82
## C4 2.06 0.50 0.37 0.86 0.75
## C5 0.06 0.71 1.04 0.28 0.62
## C6 0.33 0.33 0.54 1.43 1.02
## C7 0.34 1.20 0.00 1.45 0.78
## C8 1.66 0.41 0.37 0.77 0.51
## C9 0.00 0.98 1.84 0.00 0.35
## C10 0.03 0.53 0.12 1.91 1.05
## C11 1.20 0.32 0.09 2.67 1.30
## for24nur8-Cflo
## C1 3.22
## C2 2.60
## C3 0.28
## C4 0.35
## C5 0.91
## C6 0.51
## C7 0.30
## C8 0.00
## C9 0.75
## C10 0.38
## C11 0.35
writeLines("What are the corresponding p-values (Fisher's test)?")
## What are the corresponding p-values (Fisher's test)?
getMatrix(gom.1v4, name = "pval") %>% t() %>% round(2)
## rhy24-Pbar-LD rhy24-Pbar-DD pogo-rH-pos pogo-rH-neg for24-Cflo
## C1 0.09 0.00 0.00 1.00 0.00
## C2 0.00 0.00 0.00 1.00 0.11
## C3 0.00 1.00 1.00 0.09 0.93
## C4 0.00 1.00 0.99 0.72 0.97
## C5 1.00 1.00 0.38 1.00 1.00
## C6 1.00 1.00 0.88 0.29 0.51
## C7 1.00 0.17 1.00 0.10 0.96
## C8 0.03 1.00 0.97 0.76 1.00
## C9 1.00 0.56 0.04 1.00 1.00
## C10 1.00 1.00 1.00 0.00 0.17
## C11 0.00 1.00 1.00 0.00 0.00
## for24nur8-Cflo
## C1 0.00
## C2 0.00
## C3 0.99
## C4 0.98
## C5 0.75
## C6 0.86
## C7 0.99
## C8 1.00
## C9 0.76
## C10 1.00
## C11 1.00
h.genes.pos %>%
stacked.zplot(bg.alpha = .08, alpha = 1) %>%
multi.plot(rows = 2, cols = 1)
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
h.genes.neg %>%
stacked.zplot(bg.alpha = .08, alpha = 1) %>%
multi.plot(rows = 2, cols = 1)
## TableGrob (2 x 1) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (2-2,1-1) arrange gtable[layout]
There are several things we could output from our analyses, we decided to report the following in the supplementary file:
# module_pfams <- list()
#
# bg.genes <- dat[[1]] ## all genes used to make the network
# which.test <- "pfams"
#
# for (i in 1:length(which.labels)) {
#
# # get name of the module
# m <- which.labels[[i]]
#
# # save the enrichment results
# module_pfams[[i]] <-
# module_genes[[m]] %>%
# check_enrichment(.,
# bg = dat[[1]],
# what = which.test,
# org = "cflo",
# clean = T,
# expand = T,
# plot = F)
# }
#
# ## Now clean it up
# sapply(module_pfams, nrow)
# c <- 0
# for (i in 1:length(module_pfams)) {
#
# if(is.null(nrow(module_pfams[[i]]))) {
# paste(which.labels[[i]],"is null") %>% print()
# } else if(nrow(module_pfams[[i]])==0) {
# paste(which.labels[[i]], "is an empty tibble") %>% print()
# } else {
#
# if(c==0) {
# c <- c+1
# module.pfams <- module_pfams[[i]]
# } else {
# module.pfams <- rbind(module.pfams, module_pfams[[i]])
# }
# }
# }
#
# ## change the name of the column
# module.pfams <-
# module.pfams %>%
# select(gene_name, enriched_in_module=annot_desc) %>%
# filter(enriched_in_module!="no_desc")
#
# # check the output dataframe
# module.pfams %>% head()
Let’s make the file
# results.gcn <-
# pbar.ld.mods %>%
#
# pull(gene_name) %>%
#
# ## rhythmicity data
# TC7_annotator() %>%
# select(gene_name, everything()) %>%
#
# ## cluster identity
# left_join(pbar.ld.mods, by="gene_name") %>%
#
# ## add total connectivity data
# left_join(Alldegrees1 %>% select(gene_name,kTotal), by="gene_name") %>%
#
# ## add the enriched pfam
# left_join(module.pfams, by="gene_name") %>%
#
# # Geneset: "for.brain.head.rhy24.cluster1" | DRGs (disease-associated)
# mutate(rhy_brain_head_cluster1 = ifelse(gene_name %in% for.brain.head.rhy24.cluster1, "yes", "no")) %>%
# # Geneset: "for24.nur8" | DRGs (caste-associated)
# mutate(for24_nur8 = ifelse(gene_name %in% for24.nur8, "yes", "no")) %>%
#
# ## Geneset: DEGs
# mutate(ocflo_UP = ifelse(gene_name %in% ocflo.up, "yes", "no")) %>%
# mutate(ocflo_DOWN = ifelse(gene_name %in% ocflo.down, "yes", "no")) %>%
# mutate(beau_UP = ifelse(gene_name %in% beau.up, "yes", "no")) %>%
# mutate(beau_DOWN = ifelse(gene_name %in% beau.down, "yes", "no")) %>%
#
# ## order the columns and rows
# select(module_identity, kTotal, enriched_in_module,
# gene_name, gene_desc = blast_annotation,
# rhy_brain_head_cluster1, for24_nur8,
# ocflo_UP, ocflo_DOWN, beau_UP, beau_DOWN,
# everything()) %>%
# arrange(desc(module_identity), enriched_in_module, desc(kTotal))
#
#
# # ## export it
# # write.csv(results.gcn,
# # file = paste0(path_to_repo, "/results/00_supplementary_files/07_Cflo_forager_head_GCN_results.csv"),
# # row.names = F)
# cgs.18 <- c("LOC105252466")
# cgs.19 <- c("LOC105250191", "LOC105256631")
# cgs.21 <- c("LOC105256454", "LOC105257275", "LOC105255207", "LOC105248529",
# "LOC105253861", "LOC105257836")
# cgs.22 <- c("LOC105252510", "LOC105258655", "LOC105251553", "LOC105256952",
# "LOC105252917", "LOC105249574", "LOC105259208")
#
# cgs <- list(cgs.18, cgs.19, cgs.21, cgs.22)
#
# plots <- list()
# for (i in 1:length(cgs)){
#
# plots[[i]] <-
# cgs[[i]] %>%
# stacked.zplot_tc7(bg.alpha = 0.4) %>%
# multi.plot(rows = 1, cols = 3)
# }
#
# # png(paste0(path_to_repo, "/results/figures/", sample.name[1],"/",
# # "clock_genes_daily_exp.png"),
# # width = 40, height = 20, units = "cm", res = 300)
# # plots %>%
# # multi.plot(rows = 1, cols = 4)
# # dev.off()